All pandas objects are value mutable but may not be size mutable. Lenght of a Series cannot be changed but columns can be inserted into a DataFrame.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [5]:
# Creating a Series Object
s = pd.Series([1,2,3,np.nan, 'a', 4+5j])
s
Out[5]:
In [8]:
# To generate dates
dates = pd.date_range('20130125', periods = 8)
dates
Out[8]:
In [12]:
# Creating a DataFrame
dates = pd.date_range('20180101', periods = 6)
df = pd.DataFrame(np.random.rand(6,4), index = dates, columns = list('ABCD'))
df
Out[12]:
In [15]:
# Creating a DataFrame from a dictionary
temp_dict = {'A':1,
'B':pd.Series([2,3,4,5]),
'C':np.array([3]*4, dtype='int32')}
df2 = pd.DataFrame(temp_dict)
df2
Out[15]:
In [16]:
# To get the dtype for every column
df2.dtypes
Out[16]:
In [20]:
# To get the first x entries use pd.head(x) default = 5
df.head()
Out[20]:
In [21]:
# To view last x entries
df.tail(3)
Out[21]:
In [22]:
# To get the index of the dataframe
df.index
Out[22]:
In [23]:
# To get index of columns
df.columns
Out[23]:
In [24]:
# To get a quick statistic summary of a dataFrame
df.describe()
Out[24]:
In [26]:
# To transpose your DataFrame
df.T
Out[26]:
In [29]:
# To sort by an axis
# axis 0 mean along the columns i.e. the dates got sorted in descending order
df.sort_index(axis=0, ascending=False)
Out[29]:
In [31]:
# To arrange along the columns
df.sort_index(axis=1, ascending=False)
Out[31]:
In [34]:
# To sort by specific values
df.sort_values(by='C', ascending=False)
Out[34]:
In [35]:
# For selecting elements
In [36]:
# To select columns
df['A']
Out[36]:
In [37]:
df[0:3]
Out[37]:
In [39]:
df[0:4].A
Out[39]:
In [46]:
# To select by labels
dates
Out[46]:
In [51]:
# To select a row
df.loc['2018-01-01']
Out[51]:
In [53]:
# To select from multiple columns
df.loc[:, ['A', 'B']]
Out[53]:
In [56]:
# To get the cell values
df.loc['2018-01-01', 'B']
Out[56]:
In [63]:
# Use this method for fast access than the previous one
df.at[dates[0], 'B'] # Where dates[0] = row name
Out[63]:
In [67]:
dates[0]
Out[67]:
In [68]:
df
Out[68]:
In [66]:
# Now to operate on DataFrame using indexing without the index names
df.iloc[3]
Out[66]:
In [69]:
# Here the first argument returns the number of rows and
# the second argument returns the number of columns
df.iloc[1:3 , 0:3]
Out[69]:
In [76]:
# To select specific columns
df.iloc[[1,2,4], :]
Out[76]:
In [71]:
df.iloc[0,1]
Out[71]:
In [72]:
# To get fast access use
df.iat[0,1]
Out[72]:
In [73]:
df.iat[0,1] = 1
In [74]:
df.at[dates[0], 'B'] = 2
In [75]:
df.iat[0,1]
Out[75]:
In [79]:
# to make DataFrames by conditioning
df[df.A > 0.5]
Out[79]:
In [83]:
df[df > 0.5]
Out[83]:
In [95]:
# To add new columns to DataFrame
df['E'] = ['one', 'one','two','three','four','three']
df
Out[95]:
In [87]:
df['F'] = pd.Series([1,2,3,4,5,6])
df
Out[87]:
In [88]:
# To select specific rows with desired values
df[df['E'].isin(['two', 'three'])]
Out[88]:
In [96]:
# Missing data
# By default represented by np.nan
df = df[df > 0.5]
df
Out[96]:
In [97]:
del df['E']
df
Out[97]:
In [99]:
# To change index of DataFrame
df.reindex(index=['one', 'two', 'three', 'four', 'five', 'six'],
columns=list('ZYXWV'))
Out[99]:
In [101]:
# To drop any row having missing values
df.dropna(how='any')
# If the row contains a single Nan it would be dropped
Out[101]:
In [102]:
# All the entries in a row must by nan to get dropped
df.dropna(how='all')
Out[102]:
In [104]:
# To replace Nan with desired values
df.fillna(value='Value Filled')
Out[104]:
In [106]:
# TO get boolean mask of the DataFrame
df.isna()
# False if not Nan and True if Nan
Out[106]:
In [107]:
# Various operations on a DataFrame
In [114]:
df = pd.DataFrame(10*np.random.rand(6,4),
index = pd.date_range('20180101', periods=6),
columns=list('ABCD'))
df
Out[114]:
In [117]:
df.mean() # along axis = 0
Out[117]:
In [118]:
# To apply some random function
df.apply(lambda x: x.max()-x.min())
Out[118]:
In [120]:
# To get value counts
df['A'].value_counts()
Out[120]:
In [123]:
# To merge DataFrames use merge
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
print(left)
print(right)
pd.merge(left, right)
Out[123]: